CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --served-model-name bge-reranker-v2-m3 --model /home/LB14787_linux/jinke/models/bge-reranker-v2-m3-2G --port 10527 --task score --block-size 16 --dtype float32 --trust-remote-code --enable-prefix-caching
